import sys

#ref_locus_master.csv
#txt or csv file with column heads STR_Locus_ID, Seq including TAs, unit number, unit sequence, unit size, STR start (bp into seq where STR starts)
myfile= open("~/Repeats/MIPSTR_analysis/ref_locus_master.csv", "r")

headers= myfile.readline()
myseqs= myfile.readlines()

for line in myseqs:
	info = line.split(",")
	#info = line.split() - use if txt file instead of csv
	MIP_name = info[0]
	seq = info[1]
	copy_number = int(info[2])
	unit_seq = info[3]
	unit_size = int(info[4])
	str_start = int(info[5])

	#output a fasta file for each locus
	
	refseq = open("/Repeats/MIPSTR_analysis/100_repeat_reference_seqs/ref_%s.fasta"%(MIP_name), "w")

	str_end = str_start + (unit_size*copy_number)
	pre_rep_seq = seq[0:str_start]
	post_rep_seq = seq[str_end:]
	ct = 0
	while 1:
		if ct==100: break
		ct += 1
		refseq.write(">" + MIP_name + "_" + str(ct) + "\n")
		refseq.write(pre_rep_seq + unit_seq*ct + post_rep_seq + "\n")
